@aiello/wechat-to-markdown 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,105 @@
1
+ # wechat-to-markdown
2
+
3
+ Fork from https://github.com/LuckyRyan-web/wechat-to-markdown
4
+
5
+ add some bug fix
6
+
7
+ ## description
8
+
9
+ Enter the WeChat public address to convert it to markdown format
10
+
11
+ English | [简体中文](README.zh-CN.md)
12
+
13
+ ## return options
14
+
15
+ ```ts
16
+ interface TurnDownResult {
17
+ success: boolean
18
+ code: number
19
+ data?: {
20
+ title?: string
21
+ author?: string
22
+ content?: string
23
+ }
24
+ msg?: string
25
+ }
26
+
27
+ ```
28
+
29
+ ## Basic Usage
30
+
31
+ ## CommonJs
32
+
33
+ ```javascript
34
+ const transformHtml2Markdown = require('@ryan-liu/wechat-to-markdown').default
35
+
36
+ setTimeout(async () => {
37
+ const articleData = await transformHtml2Markdown('https://mp.weixin.qq.com/s/9d5DWg7YdMHPvVl-2KLH2w')
38
+
39
+ const { title, author, content } = articleData.data
40
+
41
+ console.log('title', title)
42
+ console.log('author', author)
43
+ console.log('content', content)
44
+ }, 0)
45
+ ```
46
+
47
+ ## vue3
48
+ **index.ts**
49
+ ```javascript
50
+ import transformHtml2Markdown from '@ryan-liu/wechat-to-markdown'
51
+
52
+ setup() {
53
+ const getData = async () => {
54
+ const articleData = await transformHtml2Markdown(
55
+ '/api/s/9d5DWg7YdMHPvVl-2KLH2w'
56
+ )
57
+
58
+ const { title, author, content } = articleData.data
59
+
60
+ console.log('title', title)
61
+ console.log('author', author)
62
+ console.log('content', content)
63
+ }
64
+
65
+ getData()
66
+
67
+ return {}
68
+ },
69
+ ```
70
+
71
+ **vite.config.ts**
72
+
73
+ ```js
74
+ ...
75
+ server: {
76
+ proxy: {
77
+ '/api': {
78
+ target: 'https://mp.weixin.qq.com',
79
+ changeOrigin: true,
80
+ rewrite: (path) => path.replace(/^\/api/, ''),
81
+ },
82
+ },
83
+ },
84
+ ...
85
+ ```
86
+
87
+
88
+ ## Packages
89
+
90
+ [axios](http://www.axios-js.com/)
91
+
92
+ [cheerio](https://github.com/cheeriojs/cheerio)
93
+
94
+ [turndown (html to markdown)](https://github.com/mixmark-io/turndown)
95
+
96
+ [turndown-plugin-gfm (turndown parsing partial html element plugin)](https://github.com/mixmark-io/turndown-plugin-gfm)
97
+
98
+ ## Reference
99
+
100
+ [html2md (A very well developed url to markdown project)](https://github.com/helloworld-Co/html2md)
101
+
102
+
103
+ ## TODO
104
+
105
+ 1. The markdown code that supports conversion can replace the image and upload it to the cdn
@@ -0,0 +1,98 @@
1
+ ## 说明
2
+
3
+ 输入微信公众号地址,将其转换为 markdown 格式
4
+
5
+ [English](README.md) | 简体中文
6
+
7
+ ## 返回属性
8
+
9
+ ```ts
10
+ interface TurnDownResult {
11
+ success: boolean
12
+ code: number
13
+ data?: {
14
+ title?: string
15
+ author?: string
16
+ content?: string
17
+ }
18
+ msg?: string
19
+ }
20
+
21
+ ```
22
+ ## 基本用法
23
+
24
+ ## CommonJs
25
+
26
+ ```javascript
27
+ const transformHtml2Markdown = require('@ryan-liu/wechat-to-markdown').default
28
+
29
+ setTimeout(async () => {
30
+ const articleData = await transformHtml2Markdown('https://mp.weixin.qq.com/s/9d5DWg7YdMHPvVl-2KLH2w')
31
+
32
+ const { title, author, content } = articleData.data
33
+
34
+ console.log('title', title)
35
+ console.log('author', author)
36
+ console.log('content', content)
37
+ }, 0)
38
+ ```
39
+
40
+ ## vue3
41
+ **index.ts**
42
+
43
+ ```javascript
44
+ import transformHtml2Markdown from '@ryan-liu/wechat-to-markdown'
45
+
46
+ setup() {
47
+ const getData = async () => {
48
+ const articleData = await transformHtml2Markdown(
49
+ '/api/s/9d5DWg7YdMHPvVl-2KLH2w'
50
+ )
51
+
52
+ const { title, author, content } = articleData.data
53
+
54
+ console.log('title', title)
55
+ console.log('author', author)
56
+ console.log('content', content)
57
+ }
58
+
59
+ getData()
60
+
61
+ return {}
62
+ },
63
+ ```
64
+
65
+ **vite.config.ts**
66
+
67
+ ```js
68
+ ...
69
+ server: {
70
+ proxy: {
71
+ '/api': {
72
+ target: 'https://mp.weixin.qq.com',
73
+ changeOrigin: true,
74
+ rewrite: (path) => path.replace(/^\/api/, ''),
75
+ },
76
+ },
77
+ },
78
+ ...
79
+ ```
80
+
81
+
82
+ ## 使用到的包
83
+
84
+ [axios (著名的请求库)](http://www.axios-js.com/)
85
+
86
+ [cheerio (可以用 JQuery 方式操作 html 源码)](https://github.com/cheeriojs/cheerio)
87
+
88
+ [turndown (html to markdown)](https://github.com/mixmark-io/turndown)
89
+
90
+ [turndown-plugin-gfm (turndown 解析部分 html 元素插件)](https://github.com/mixmark-io/turndown-plugin-gfm)
91
+
92
+ ## 参考
93
+
94
+ [html2md (一个非常完善的 url to markdown 项目)](https://github.com/helloworld-Co/html2md)
95
+
96
+ ## 待做事项
97
+
98
+ 1. 需要支持转化后的 markdown 代码可以替换图片地址到 cdn(微信的图片会有域名限制,非 weixin 域名不可以访问图片)
package/dist/index.cjs ADDED
@@ -0,0 +1,182 @@
1
+ var __create = Object.create;
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __getProtoOf = Object.getPrototypeOf;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __markAsModule = (target) => __defProp(target, "__esModule", { value: true });
8
+ var __export = (target, all) => {
9
+ __markAsModule(target);
10
+ for (var name in all)
11
+ __defProp(target, name, { get: all[name], enumerable: true });
12
+ };
13
+ var __reExport = (target, module2, desc) => {
14
+ if (module2 && typeof module2 === "object" || typeof module2 === "function") {
15
+ for (let key of __getOwnPropNames(module2))
16
+ if (!__hasOwnProp.call(target, key) && key !== "default")
17
+ __defProp(target, key, { get: () => module2[key], enumerable: !(desc = __getOwnPropDesc(module2, key)) || desc.enumerable });
18
+ }
19
+ return target;
20
+ };
21
+ var __toModule = (module2) => {
22
+ return __reExport(__markAsModule(__defProp(module2 != null ? __create(__getProtoOf(module2)) : {}, "default", module2 && module2.__esModule && "default" in module2 ? { get: () => module2.default, enumerable: true } : { value: module2, enumerable: true })), module2);
23
+ };
24
+
25
+ // src/index.ts
26
+ __export(exports, {
27
+ Status: () => Status,
28
+ default: () => transformHtml2Markdown
29
+ });
30
+
31
+ // node_modules/tsup/assets/cjs_shims.js
32
+ var importMetaUrlShim = typeof document === "undefined" ? new (require("url")).URL("file:" + __filename).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
33
+
34
+ // src/index.ts
35
+ var import_axios = __toModule(require("axios"));
36
+ var import_cheerio2 = __toModule(require("cheerio"));
37
+
38
+ // src/error.ts
39
+ var errObj = {
40
+ "400": "\u5185\u5BB9\u89E3\u6790\u5931\u8D25"
41
+ };
42
+
43
+ // src/type.ts
44
+ var Status;
45
+ (function(Status2) {
46
+ Status2[Status2["Success"] = 200] = "Success";
47
+ Status2[Status2["Fail"] = 400] = "Fail";
48
+ })(Status || (Status = {}));
49
+
50
+ // src/turndownCode.ts
51
+ var import_turndown = __toModule(require("turndown"));
52
+ var import_turndown_plugin_gfm = __toModule(require("turndown-plugin-gfm"));
53
+
54
+ // src/formatHtml.ts
55
+ var import_cheerio = __toModule(require("cheerio"));
56
+ function formatCode(htmlStr) {
57
+ let code = htmlStr;
58
+ code = code.replace(/<br>/gi, "\n");
59
+ code = code.replace(/&nbsp;/gi, " ");
60
+ code = code.replace(/&lt;/gi, "<");
61
+ code = code.replace(/&gt;/gi, ">");
62
+ code = code.replace(/&amp;/gi, "&");
63
+ code = code.replace(/&quot;/gi, '"');
64
+ code = code.replace(/&apos;/gi, "\u2018");
65
+ code = code.replace(/&times;/gi, "*");
66
+ code = code.replace(/&divide;/gi, "%");
67
+ const $ = import_cheerio.default.load(code);
68
+ return $.text();
69
+ }
70
+ function figure2markdown(figureHTML) {
71
+ const imgRegex = /<img.*?data-src=['"](.*?)['"]/;
72
+ const descRegex = /\<figcaption .*?>(.+)<\/figcaption>/;
73
+ const imgArr = figureHTML.match(imgRegex);
74
+ const descArr = figureHTML.match(descRegex);
75
+ let imgUrl = "";
76
+ let desc = "";
77
+ if (Array.isArray(imgArr)) {
78
+ imgUrl = imgArr[1];
79
+ }
80
+ if (Array.isArray(descArr)) {
81
+ desc = descArr[1];
82
+ }
83
+ if (imgUrl) {
84
+ return `
85
+
86
+ ![${desc}](${imgUrl})
87
+
88
+ `;
89
+ }
90
+ return;
91
+ }
92
+
93
+ // src/turndownCode.ts
94
+ var turndownService = new import_turndown.default({
95
+ codeBlockStyle: "fenced",
96
+ hr: ""
97
+ });
98
+ turndownService.use(import_turndown_plugin_gfm.gfm);
99
+ turndownService.addRule("pre2Code", {
100
+ filter: ["pre"],
101
+ replacement(content, node) {
102
+ const len = content.length;
103
+ const isCode = content[0] === "`" && content[len - 1] === "`";
104
+ let pre_Markdown = "";
105
+ if (isCode) {
106
+ pre_Markdown = formatCode(node.innerHTML);
107
+ }
108
+ const res = isCode ? pre_Markdown : content;
109
+ return "```\n" + res + "\n```\n";
110
+ }
111
+ }).addRule("getImage", {
112
+ filter: ["img"],
113
+ replacement(content, node) {
114
+ const src = node.getAttribute("data-src") || "";
115
+ return src ? `
116
+
117
+ ![](${src})
118
+
119
+ ` : "";
120
+ }
121
+ }).addRule("lineBreaks", {
122
+ filter: "br",
123
+ replacement: () => "\n"
124
+ }).addRule("img2Code", {
125
+ filter: ["figure"],
126
+ replacement(content, node) {
127
+ const res = figure2markdown(node.innerHTML);
128
+ return res || "";
129
+ }
130
+ });
131
+
132
+ // src/index.ts
133
+ var getError = (code) => {
134
+ return {
135
+ code,
136
+ success: false,
137
+ msg: errObj[code]
138
+ };
139
+ };
140
+ async function transformHtml2Markdown(url) {
141
+ let json = await import_axios.default.request({
142
+ url,
143
+ method: "get",
144
+ timeout: 3e4,
145
+ transformResponse(res) {
146
+ return res;
147
+ }
148
+ }).then((res) => {
149
+ const $ = import_cheerio2.default.load(res["data"]);
150
+ let title = $("#activity-name").text();
151
+ title = title.trim() || "";
152
+ const author = Array.from(new Set($("#js_name").text().split("\n").map((item) => item.trim()).filter(Boolean))).join("\n");
153
+ const html = $("#js_content").html();
154
+ if (html && html.length > 0) {
155
+ let res2 = turndownService.turndown(html);
156
+ res2 = `## ${title}
157
+
158
+ ## \u4F5C\u8005 ${author}
159
+
160
+ ` + res2;
161
+ return {
162
+ success: true,
163
+ code: Status.Success,
164
+ data: {
165
+ title,
166
+ author,
167
+ content: res2
168
+ }
169
+ };
170
+ }
171
+ return getError(Status.Fail);
172
+ }).catch((err) => {
173
+ console.log(err);
174
+ return err;
175
+ });
176
+ return json;
177
+ }
178
+ // Annotate the CommonJS export names for ESM import in node:
179
+ 0 && (module.exports = {
180
+ Status
181
+ });
182
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
+ "sourcesContent": ["import axios from 'axios'\nimport cheerio from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { turndownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n let json: TurnDownResult = await axios\n .request({\n url,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = cheerio.load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n\n const author = Array.from(\n new Set(\n $('#js_name')\n .text()\n .split('\\n')\n .map((item) => item.trim())\n .filter(Boolean)\n )\n ).join('\\n')\n\n const html = $('#js_content').html()\n\n if (html && html.length > 0) {\n let res = turndownService.turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\nconst turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n})\n\nturndownService.use(gfm)\n\n// \u81EA\u5B9A\u4E49\u914D\u7F6E\nturndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n ![](${src}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\nexport { turndownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAoB;;;AEDb,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAAoB;;;ACJpB,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD9DJ,IAAM,kBAAkB,IAAI,wBAAgB;AAAA,EACxC,gBAAgB;AAAA,EAChB,IAAI;AAAA;AAGR,gBAAgB,IAAI;AAGpB,gBACK,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,QAAQ;AAEpB,UAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,QAAI,eAAe;AAEnB,QAAI,QAAQ;AACR,qBAAe,WAAW,KAAK;AAAA;AAGnC,UAAM,MAAM,SAAS,eAAe;AAEpC,WAAO,UAAU,MAAM;AAAA;AAAA,GAG9B,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,WAAO,MAAM;AAAA;AAAA,OAAY;AAAA;AAAA,IAAc;AAAA;AAAA,GAG9C,QAAQ,cAAc;AAAA,EACnB,QAAQ;AAAA,EACR,aAAa,MAAM;AAAA,GAEtB,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,gBAAgB,KAAK;AACjC,WAAO,OAAO;AAAA;AAAA;;;AJ5C1B,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,MAAI,OAAuB,MAAM,qBAC5B,QAAQ;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AACX,UAAM,IAAI,wBAAQ,KAAK,IAAI;AAE3B,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AAExB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA,EAAE,YACG,OACA,MAAM,MACN,IAAI,CAAC,SAAS,KAAK,QACnB,OAAO,WAElB,KAAK;AAEP,UAAM,OAAO,EAAE,eAAe;AAE9B,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,gBAAgB,SAAS;AAEnC,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
6
+ "names": []
7
+ }
@@ -0,0 +1,18 @@
1
+ interface TurnDownResult {
2
+ success: boolean;
3
+ code: number;
4
+ data?: {
5
+ title?: string;
6
+ author?: string;
7
+ content?: string;
8
+ };
9
+ msg?: string;
10
+ }
11
+ declare const enum Status {
12
+ Success = 200,
13
+ Fail = 400
14
+ }
15
+
16
+ declare function transformHtml2Markdown(url: string): Promise<TurnDownResult>;
17
+
18
+ export { Status, TurnDownResult, transformHtml2Markdown as default };
package/dist/index.js ADDED
@@ -0,0 +1,149 @@
1
+ // src/index.ts
2
+ import axios from "axios";
3
+ import cheerio2 from "cheerio";
4
+
5
+ // src/error.ts
6
+ var errObj = {
7
+ "400": "\u5185\u5BB9\u89E3\u6790\u5931\u8D25"
8
+ };
9
+
10
+ // src/type.ts
11
+ var Status;
12
+ (function(Status2) {
13
+ Status2[Status2["Success"] = 200] = "Success";
14
+ Status2[Status2["Fail"] = 400] = "Fail";
15
+ })(Status || (Status = {}));
16
+
17
+ // src/turndownCode.ts
18
+ import turnDownService from "turndown";
19
+ import { gfm } from "turndown-plugin-gfm";
20
+
21
+ // src/formatHtml.ts
22
+ import cheerio from "cheerio";
23
+ function formatCode(htmlStr) {
24
+ let code = htmlStr;
25
+ code = code.replace(/<br>/gi, "\n");
26
+ code = code.replace(/&nbsp;/gi, " ");
27
+ code = code.replace(/&lt;/gi, "<");
28
+ code = code.replace(/&gt;/gi, ">");
29
+ code = code.replace(/&amp;/gi, "&");
30
+ code = code.replace(/&quot;/gi, '"');
31
+ code = code.replace(/&apos;/gi, "\u2018");
32
+ code = code.replace(/&times;/gi, "*");
33
+ code = code.replace(/&divide;/gi, "%");
34
+ const $ = cheerio.load(code);
35
+ return $.text();
36
+ }
37
+ function figure2markdown(figureHTML) {
38
+ const imgRegex = /<img.*?data-src=['"](.*?)['"]/;
39
+ const descRegex = /\<figcaption .*?>(.+)<\/figcaption>/;
40
+ const imgArr = figureHTML.match(imgRegex);
41
+ const descArr = figureHTML.match(descRegex);
42
+ let imgUrl = "";
43
+ let desc = "";
44
+ if (Array.isArray(imgArr)) {
45
+ imgUrl = imgArr[1];
46
+ }
47
+ if (Array.isArray(descArr)) {
48
+ desc = descArr[1];
49
+ }
50
+ if (imgUrl) {
51
+ return `
52
+
53
+ ![${desc}](${imgUrl})
54
+
55
+ `;
56
+ }
57
+ return;
58
+ }
59
+
60
+ // src/turndownCode.ts
61
+ var turndownService = new turnDownService({
62
+ codeBlockStyle: "fenced",
63
+ hr: ""
64
+ });
65
+ turndownService.use(gfm);
66
+ turndownService.addRule("pre2Code", {
67
+ filter: ["pre"],
68
+ replacement(content, node) {
69
+ const len = content.length;
70
+ const isCode = content[0] === "`" && content[len - 1] === "`";
71
+ let pre_Markdown = "";
72
+ if (isCode) {
73
+ pre_Markdown = formatCode(node.innerHTML);
74
+ }
75
+ const res = isCode ? pre_Markdown : content;
76
+ return "```\n" + res + "\n```\n";
77
+ }
78
+ }).addRule("getImage", {
79
+ filter: ["img"],
80
+ replacement(content, node) {
81
+ const src = node.getAttribute("data-src") || "";
82
+ return src ? `
83
+
84
+ ![](${src})
85
+
86
+ ` : "";
87
+ }
88
+ }).addRule("lineBreaks", {
89
+ filter: "br",
90
+ replacement: () => "\n"
91
+ }).addRule("img2Code", {
92
+ filter: ["figure"],
93
+ replacement(content, node) {
94
+ const res = figure2markdown(node.innerHTML);
95
+ return res || "";
96
+ }
97
+ });
98
+
99
+ // src/index.ts
100
+ var getError = (code) => {
101
+ return {
102
+ code,
103
+ success: false,
104
+ msg: errObj[code]
105
+ };
106
+ };
107
+ async function transformHtml2Markdown(url) {
108
+ let json = await axios.request({
109
+ url,
110
+ method: "get",
111
+ timeout: 3e4,
112
+ transformResponse(res) {
113
+ return res;
114
+ }
115
+ }).then((res) => {
116
+ const $ = cheerio2.load(res["data"]);
117
+ let title = $("#activity-name").text();
118
+ title = title.trim() || "";
119
+ const author = Array.from(new Set($("#js_name").text().split("\n").map((item) => item.trim()).filter(Boolean))).join("\n");
120
+ const html = $("#js_content").html();
121
+ if (html && html.length > 0) {
122
+ let res2 = turndownService.turndown(html);
123
+ res2 = `## ${title}
124
+
125
+ ## \u4F5C\u8005 ${author}
126
+
127
+ ` + res2;
128
+ return {
129
+ success: true,
130
+ code: Status.Success,
131
+ data: {
132
+ title,
133
+ author,
134
+ content: res2
135
+ }
136
+ };
137
+ }
138
+ return getError(Status.Fail);
139
+ }).catch((err) => {
140
+ console.log(err);
141
+ return err;
142
+ });
143
+ return json;
144
+ }
145
+ export {
146
+ Status,
147
+ transformHtml2Markdown as default
148
+ };
149
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
+ "sourcesContent": ["import axios from 'axios'\nimport cheerio from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { turndownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n let json: TurnDownResult = await axios\n .request({\n url,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = cheerio.load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n\n const author = Array.from(\n new Set(\n $('#js_name')\n .text()\n .split('\\n')\n .map((item) => item.trim())\n .filter(Boolean)\n )\n ).join('\\n')\n\n const html = $('#js_content').html()\n\n if (html && html.length > 0) {\n let res = turndownService.turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\nconst turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n})\n\nturndownService.use(gfm)\n\n// \u81EA\u5B9A\u4E49\u914D\u7F6E\nturndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n ![](${src}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\nexport { turndownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD9DJ,IAAM,kBAAkB,IAAI,gBAAgB;AAAA,EACxC,gBAAgB;AAAA,EAChB,IAAI;AAAA;AAGR,gBAAgB,IAAI;AAGpB,gBACK,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,QAAQ;AAEpB,UAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,QAAI,eAAe;AAEnB,QAAI,QAAQ;AACR,qBAAe,WAAW,KAAK;AAAA;AAGnC,UAAM,MAAM,SAAS,eAAe;AAEpC,WAAO,UAAU,MAAM;AAAA;AAAA,GAG9B,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,WAAO,MAAM;AAAA;AAAA,OAAY;AAAA;AAAA,IAAc;AAAA;AAAA,GAG9C,QAAQ,cAAc;AAAA,EACnB,QAAQ;AAAA,EACR,aAAa,MAAM;AAAA,GAEtB,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,gBAAgB,KAAK;AACjC,WAAO,OAAO;AAAA;AAAA;;;AH5C1B,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,MAAI,OAAuB,MAAM,MAC5B,QAAQ;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AACX,UAAM,IAAI,SAAQ,KAAK,IAAI;AAE3B,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AAExB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA,EAAE,YACG,OACA,MAAM,MACN,IAAI,CAAC,SAAS,KAAK,QACnB,OAAO,WAElB,KAAK;AAEP,UAAM,OAAO,EAAE,eAAe;AAE9B,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,gBAAgB,SAAS;AAEnC,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
6
+ "names": []
7
+ }
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "@aiello/wechat-to-markdown",
3
+ "version": "1.2.3",
4
+ "description": "解析微信文章 URL 为 markdown",
5
+ "author": "Aiello Chan<aiello.chan@gmail.com>",
6
+ "keywords": [
7
+ "wechat to markdown",
8
+ "markdown"
9
+ ],
10
+ "main": "dist/index.cjs",
11
+ "module": "dist/index.js",
12
+ "types": "dist/index.d.ts",
13
+ "files": [
14
+ "dist"
15
+ ],
16
+ "type": "module",
17
+ "license": "MIT",
18
+ "devDependencies": {
19
+ "@ryan-liu/eslint-config-jsx-config": "^1.0.2",
20
+ "@types/fs-extra": "^9.0.13",
21
+ "@types/turndown": "^5.0.1",
22
+ "cross-spawn": "^7.0.3",
23
+ "eslint": "^7.32.0",
24
+ "husky": "^7.0.2",
25
+ "inquirer": "^8.2.0",
26
+ "lint-staged": "^11.1.2",
27
+ "prettier": "^2.4.1",
28
+ "tsup": "^5.4.0",
29
+ "typescript": "^4.4.3",
30
+ "vitest": "^1.3.1"
31
+ },
32
+ "scripts": {
33
+ "dev": "yarn build --watch",
34
+ "build": "tsup src/index.ts --dts --format cjs,esm",
35
+ "lint": "lint-staged",
36
+ "pub": "node tools/publish.js",
37
+ "test": "vitest"
38
+ },
39
+ "husky": {
40
+ "hooks": {
41
+ "pre-commit": "lint-staged"
42
+ }
43
+ },
44
+ "lint-staged": {
45
+ "*.{ts,js}": [
46
+ "eslint --fix --quiet"
47
+ ],
48
+ "*.json": [
49
+ "prettier --write"
50
+ ]
51
+ },
52
+ "tsup": {
53
+ "splitting": false,
54
+ "sourcemap": true,
55
+ "clean": true,
56
+ "external": [
57
+ "html2markdown"
58
+ ]
59
+ },
60
+ "dependencies": {
61
+ "axios": "^0.22.0",
62
+ "cheerio": "^1.0.0-rc.10",
63
+ "turndown": "^7.1.1",
64
+ "turndown-plugin-gfm": "^1.0.2"
65
+ }
66
+ }